library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(qiime2R)
library(ggpubr)
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:qiime2R':
##
## mean_sd
library(ggplot2)
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(ggh4x)
library(knitr)
library(DT)
Needed File Paths
metadata_FP <- '../data/misc/s1_filt_comp_metadata.tsv'
uw_dist_fp <- '../data/s1_filt_core/uw_dist_matrix.tsv'
w_dist_fp <- '../data/s1_filt_core/w_dist_matrix.tsv'
Reading in Distance Matrices and Metadata
metadata <- read_tsv(metadata_FP)
## Rows: 462 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): #SampleID, sample_type, facility, diet
## dbl (7): day_post_inf, mouse_id, high_fat, high_fiber, purified_diet, seq_de...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(metadata)[names(metadata) == '#SampleID'] <- 'sampleid'
uw_dist <- read_tsv(uw_dist_fp)
## New names:
## Rows: 459 Columns: 460
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (459): 2_0418_9740, 2_0418_9741, 2_0418_9742, 2_0418_9743,
## 2_0418_9744, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(uw_dist)[names(uw_dist) == '...1'] <- 'row_sampleid'
w_dist <- read_tsv(w_dist_fp)
## New names:
## Rows: 459 Columns: 460
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (1): ...1 dbl (459): 2_0418_9740, 2_0418_9741, 2_0418_9742, 2_0418_9743,
## 2_0418_9744, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(w_dist)[names(w_dist) == '...1'] <- 'row_sampleid'
Data Wrangling
metadata %>%
select(sampleid, sample_type, mouse_id, diet) -> mini_meta
uw_dist %>%
gather(-row_sampleid, key = col_sampleid, value = dist) %>%
merge(mini_meta, by.x = 'row_sampleid', by.y = 'sampleid') %>%
merge(mini_meta, by.x = 'col_sampleid', by.y = 'sampleid') %>%
mutate(dist_key = paste(row_sampleid, col_sampleid, sep = "_"),
samp_type_key = paste(sample_type.x, sample_type.y, sep = "_"),
mouse_id_key = paste(mouse_id.x, mouse_id.y, sep = "_")) -> uw_dist
## stool sample paired by mouse id table
uw_dist %>%
filter(sample_type.x == 'colon',
sample_type.y == 'colon',
mouse_id.x == mouse_id.y) %>%
mutate(class = 'stool_same_mouse') -> uw_dist_mouse
## matched by mouse id cecal and stool sample table
uw_dist %>%
filter(sample_type.x != sample_type.y,
mouse_id.x == mouse_id.y) %>%
mutate(class = 'cecal_stool_matched') -> uw_dist_cecStool_match
## unmatched by mouse id cecal and stool sample table
uw_dist %>%
filter(sample_type.x != sample_type.y,
mouse_id.x != mouse_id.y) %>%
mutate(class = 'cecal_stool_unmatched') -> uw_dist_cecStool_unmatch
## putting all tables together
rbind(uw_dist_mouse,
uw_dist_cecStool_match,
uw_dist_cecStool_unmatch) -> uw_dist_giant
uw_dist_giant %>%
filter(row_sampleid != col_sampleid) -> uw_dist_giant
Tables I Created
DT::datatable(uw_dist_mouse)
DT::datatable(uw_dist_cecStool_match)
DT::datatable(uw_dist_cecStool_unmatch)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Attempt at Some Kind of Plot
uw_dist_giant %>%
ggplot(aes(x = class, y = dist)) +
geom_boxplot(aes(group = class, fill = class), alpha = 0.5) +
# geom_jitter(alpha = 0.3, width = 0.1, height = 0) +
theme_bw()
# facet_wrap(~diet.x)